Aggregating Conveyal Regional Analyses

The purpose of this file is to work through an example of how to aggregate regional analysis results from Conveyal. The example data used here is a regional analysis that measures access to non-emergency healthcare destinations in the MPO region.

The goal

The goal here is to work out how to aggregate results so that we can answer the question: “How does access to basic healthcare services differ between EJ and non-EJ populations?”

# Raster processing
library(tidyverse)
library(sf)
library(stars)
library(mapview)
library(kableExtra)

# INPUTS ####
# read in census geog layers
mpo_census_tracts<- st_read("output/demographic_data.gpkg", layer= "tracts_acs_dec_2020") %>% 
  st_transform(3857)
## Reading layer `tracts_acs_dec_2020' from data source 
##   `C:\GitHub\existing-inequities\output\demographic_data.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 811 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -71.66063 ymin: 42.01446 xmax: -70.57338 ymax: 42.73887
## Geodetic CRS:  NAD83
boundary<-st_read("output/AggregationAreas.gpkg", layer= "MPO_Boundary") %>% 
  st_transform(3857) # needs to be psudo-mercator for raster operations
## Reading layer `MPO_Boundary' from data source 
##   `C:\GitHub\existing-inequities\output\AggregationAreas.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 1 feature and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 186779.3 ymin: 862697.3 xmax: 275996 ymax: 943384.9
## Projected CRS: NAD83 / Massachusetts Mainland
communitytypes<- st_read("output/AggregationAreas.gpkg", layer= "CommunityTypes") %>% st_transform(3857)
## Reading layer `CommunityTypes' from data source 
##   `C:\GitHub\existing-inequities\output\AggregationAreas.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 8 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 186779.3 ymin: 862697.3 xmax: 275996 ymax: 943384.9
## Projected CRS: NAD83 / Massachusetts Mainland
housingsubmarkets <- st_read("output/AggregationAreas.gpkg", layer="HousingSubmarkets") %>% st_transform(3857)
## Reading layer `HousingSubmarkets' from data source 
##   `C:\GitHub\existing-inequities\output\AggregationAreas.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 7 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 186784.2 ymin: 861634.6 xmax: 274511.9 ymax: 943390.9
## Projected CRS: NAD83 / Massachusetts Mainland
# read in geotiff
access_layer<- read_stars("data/ConveyalRuns/HealthCareTests/HealthCare_NonEmergency_AMPeak_TransitBusRT_45min_50pct.geotiff") 
destination <- "Non-emergency Healthcare"
time_period <- "AM Peak"
modes <- "Transit (Bus and Rapid Transit only)"
travel_time <- "45 minutes"

# read in destinations used for conveyal analysis
dest<- st_read("output/DestinationData.gpkg", layer= "healthcare_PT") %>% 
  st_transform(3857)
## Reading layer `healthcare_PT' from data source 
##   `C:\GitHub\existing-inequities\output\DestinationData.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 471 features and 4 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 194609.1 ymin: 869927.6 xmax: 269400.3 ymax: 930973.2
## Projected CRS: NAD83 / Massachusetts Mainland
# note conveyal uses lodes workers home locations as a proxy for population density.
# the workers tiff is in stored in the same grid as the conveyal access output.
# To apply demographics we would still need to go through a process similar to option 2
# read in conveyal workers geotiff
# workers <- read_stars("data/")

First, crop the raster to the boundary extent.

# CROP Access Raster to the MPO Boundary
access_layer_crop <- st_crop(access_layer, boundary)
ggplot()+
  geom_stars(data=access_layer_crop, alpha = .7)+
  # geom_sf(data=access_contours, aes(color = value),size=.8, show.legend = F)+
  # geom_contour(data=access_layer_crop)+
  geom_sf(data=boundary,size=1,color="light gray", fill= NA)+
  coord_sf()+
  scale_fill_viridis_c(option = "D", na.value = "transparent",
                       name = paste0(destination, "\n Opportunities Accessible"))+
  # scale_color_viridis_c(option = "D", na.value = "transparent",
  #                      name = paste0(destination, "\n Opportunities Accessible"))+
  ggtitle(paste0("Access to ", destination, "\nwith ",
                                    modes, "\nin ", travel_time))+
  labs(caption= paste0("Time period: ", time_period))+
  theme_void()

Option 1: Aggregate to the census geography

Find the average number of destinations accessible within a census geography. Then multiply average opportunities accessible from the tract by demographic population.

# Option 1: find the average number of destinations accessible within a census tract

# This process finds the average number of opportunities accessible for cells that are within a given census tract geometry. Then multiplies the number of people from a given population in the tract by the average number of opportunities accessible to them.
# http://132.72.155.230:3838/r/combining-rasters-and-vector-layers.html#extracting-to-polygons-single-band

opps_by_tract<- access_layer_crop %>% 
  aggregate(mpo_census_tracts, mean, na.rm=T) %>% 
  st_as_sf() %>% 
  rename(opps= 1) # rename the avg opportunities column to "opps"

# join tract level demographic data by matching tract geog used to aggregate
opps_by_tract_demo<- mpo_census_tracts %>% 
  st_join(opps_by_tract, join = st_equals) %>% 
  mutate(
    people_opps = round(pop_dec*opps),
    opps_per_person = ifelse(pop_dec  == 0, 0, round(opps/pop_dec, 3)),
    min_opps = round(minority*opps),
    nonmin_opps = round(nonminority*opps),
    lowinc_opps = round(lowinc*opps),
    nonlowinc_opps = round(nonlowinc*opps))%>%
  mutate(id= row_number())
  #select(GEOID, ends_with("_opps"))
opps_by_tract_demo %>% 
  select(GEOID, opps, pop_dec, ends_with("_opps")) %>% 
  st_drop_geometry() %>% 
  arrange(desc(min_opps)) %>% 
  head(10) %>% 
  kbl() %>% 
  kable_styling()
GEOID opps pop_dec people_opps min_opps nonmin_opps lowinc_opps nonlowinc_opps
25025070202 191.50000 5460 1045590 832657 212933 570953 474637
25025010405 150.85714 6853 1033824 490958 542866 771291 262533
25025092000 93.45455 5180 484095 444724 39371 152855 331240
25025080601 140.22222 4732 663532 429054 234477 427771 235760
25025080500 135.75000 3288 446346 422834 23512 225046 221300
25025080801 130.62500 4282 559336 412482 146855 368989 190347
25025082100 88.14286 5224 460458 404728 55731 248495 211964
25017359400 133.21429 6754 899729 404574 495156 171445 728284
25025090100 80.44444 5171 415978 399386 16592 220728 195250
25025170702 66.30769 7995 530130 397274 132856 255179 274951
mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "min_opps", layer.name = "Minority Opps")+
  mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "nonmin_opps", layer.name = "Nonminority Opps")+
  mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "lowinc_opps", layer.name = "Low-Income Opps")+
  mapview(select(opps_by_tract_demo,GEOID, NAME, ends_with("_opps")), zcol = "nonlowinc_opps", layer.name = "Non-low-income Opps")

To summarize region-wide:

demo_summary_by_tract <- opps_by_tract_demo %>%
  st_drop_geometry() %>% 
  ungroup() %>% # or join and group by aggregation geometry instead
  summarize(people_opps = sum(people_opps, na.rm = T),
            min_opps = sum(min_opps, na.rm= T),
            nonmin_opps = sum(nonmin_opps, na.rm=T),
            lowinc_opps = sum(lowinc_opps, na.rm=T),
            nonlowinc_opps = sum(nonlowinc_opps, na.rm=T))

pct_opps_min_tract = demo_summary_by_tract$min_opps[1]/(demo_summary_by_tract$min_opps[1]+demo_summary_by_tract$nonmin_opps)
pct_opps_lowinc_tract = demo_summary_by_tract$lowinc_opps[1]/(demo_summary_by_tract$lowinc_opps[1]+demo_summary_by_tract$nonlowinc_opps)
demo_summary_by_tract %>% 
  kbl() %>% 
  kable_styling()
people_opps min_opps nonmin_opps lowinc_opps nonlowinc_opps
105630671 49798208 55828738 31591435 74035183

Note some people-opps not assigned to demographics, because of ACS demo gaps and maybe treatment of region edges. Overall, minority people have 47% of region-wide healthcare opportunities accessible by transit (Bus and RT). And low-income people have `30% of region-wide healthcare opportunities accessible by transit (Bus and RT).

Option 2: Pull demographics down to the raster cell

# Option 2 allocate opportunities from each raster cell demo populations using areas overlap and 
tracts <- mpo_census_tracts %>% 
  mutate(area_tract = unclass(st_area(geom)))
# total_pop <- sum(tracts$pop_dec, na.rm=T)

access_crop_sf <- access_layer_crop %>% 
  st_as_sf() %>%  # convert raster to sf object (each row is a raster cell as vector)
  rename(opps = 1) %>% 
  mutate(area_cell = unclass(st_area(geometry)),
         cell_id = row_number())

#summary(access_crop_sf$opps)
# total_opps <- sum(access_crop_sf$opps)
# avg_opps <- mean(access_crop_sf$opps)

opps_by_cell_demo<- access_crop_sf %>% 
  st_intersection(select(tracts, 
                         GEOID, pop_dec, 
                         percent_min, percent_nonmin,
                         percent_lowinc, percent_nonlowinc,
                         area_tract)) %>% 
  mutate(area_int = unclass(st_area(geometry))) %>% 
  mutate(pct_overlap_tract = area_int/area_tract,
         pct_overlap_cell= area_int/area_cell) %>% 
  mutate(
    pop_part= pop_dec*pct_overlap_tract,
    opps_part = opps*pct_overlap_cell,
    pop_opps = pop_part*opps_part,
    min_opps = pop_opps*percent_min,
    nonmin_opps = pop_opps*percent_nonmin,
    lowinc_opps = pop_opps*percent_lowinc,
    nonlowinc_opps = pop_opps*percent_nonlowinc) %>% 
  # group back into whole cells
  group_by(cell_id) %>% 
  summarise(
    pop_opps = sum(pop_opps, na.rm=T),
    min_opps = sum(min_opps, na.rm=T),
    nonmin_opps = sum(nonmin_opps, na.rm=T),
    lowinc_opps = sum(lowinc_opps, na.rm=T),
    nonlowinc_opps = sum(nonlowinc_opps, na.rm=T),
    GEOIDs = paste0(unique(unlist(GEOID)), collapse = ' '),
    geometry= st_union(geometry)) %>% 
  st_as_sf()
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
opps_by_cell_demo %>% 
  st_drop_geometry() %>% 
  arrange(desc(min_opps)) %>% 
  head() %>% 
  kbl() %>% 
  kable_styling()
cell_id pop_opps min_opps nonmin_opps lowinc_opps nonlowinc_opps GEOIDs
34056 286868.8 228443.53 58425.31 156642.10 130226.74 25025070103 25025070104 25025070202
34732 232862.9 114081.17 118781.72 133279.35 99583.54 25025010403 25025010500 25025010405
34286 140686.1 110605.31 30080.81 75328.86 65357.26 25025070103 25025070202
34285 147745.8 97868.66 49877.12 77238.36 70507.42 25025070201 25025070202
35182 210710.6 93953.55 116757.04 139024.35 71686.23 25025010404 25025010405
34055 139102.5 87598.66 51503.83 71913.44 67189.06 25025070201 25025070104 25025981700 25025070102 25025070202

To focus just on the cells associated with a specific tract:

opps_by_cell_focus <- opps_by_cell_demo %>% 
  filter(grepl( "25025071101", GEOIDs))
mapview(opps_by_cell_focus)
demo_summary_by_cell <- opps_by_cell_demo %>% 
  st_drop_geometry() %>% 
  ungroup() %>% # or join and group by aggregation geometry instead
  summarize(people_opps = sum(pop_opps, na.rm = T),
            min_opps = sum(min_opps, na.rm= T),
            nonmin_opps = sum(nonmin_opps, na.rm=T),
            lowinc_opps = sum(lowinc_opps, na.rm=T),
            nonlowinc_opps = sum(nonlowinc_opps, na.rm=T))

pct_opps_min_cell = demo_summary_by_tract$min_opps[1]/(demo_summary_by_tract$min_opps[1]+demo_summary_by_tract$nonmin_opps)
pct_opps_lowinc_cell = demo_summary_by_tract$lowinc_opps[1]/(demo_summary_by_tract$lowinc_opps[1]+demo_summary_by_tract$nonlowinc_opps)
demo_summary_by_cell %>% 
  kbl() %>% 
  kable_styling()
people_opps min_opps nonmin_opps lowinc_opps nonlowinc_opps
78416142 36943053 41470286 23014335 55398710

Note some people-opps not assigned to demographics, because of ACS demo gaps and maybe treatment of region edges. Overall, minority people have 47% of region-wide healthcare opportunities accessible by transit (Bus and RT). And low-income people have `30% of region-wide healthcare opportunities accessible by transit (Bus and RT).

Note: Percent of opportunities is the same for the two options, but the total people opps are not.

# test <- opps_by_cell_demo %>% 
#   select(-cell_id,-GEOIDs) %>% 
#   st_as_stars()
# 
# ggplot()+
#   geom_stars(data=test$pop_opps, alpha = .7)+
#   # geom_sf(data=access_contours, aes(color = value),size=.8, show.legend = F)+
#   # geom_contour(data=access_layer_crop)+
#   # geom_sf(data=boundary,size=1,color="light gray", fill= NA)+
#   # coord_sf()+
#   # scale_fill_viridis_c(option = "D", na.value = "transparent",
#   #                      name = paste0(destination, "\n Opportunities Accessible"))+
#   # scale_color_viridis_c(option = "D", na.value = "transparent",
#   #                      name = paste0(destination, "\n Opportunities Accessible"))+
#   ggtitle(paste0("Access to ", destination, "\nwith ",
#                                     modes, "\nin ", travel_time))+
#   # facet_wrap(~band)+
#   coord_equal() +
#     facet_wrap(~band) +
#     theme_void() +
#     scale_x_discrete(expand=c(0,0))+
#     scale_y_discrete(expand=c(0,0))+
#   # 
#   # labs(caption= paste0("Time period: ", time_period))+
#   theme_void()